NAVNavbar
Logo
cURL php NodeJS Python cSharp

Live Google Dataset Search Advanced


Live Google Dataset Search provides real-time data on the top 20 Google Dataset search engine results. These results are specific to the indicated keyword. You can specify other parameters optionally.

Instead of ‘login’ and ‘password’ use your credentials from https://app.dataforseo.com/api-dashboard

<?php
// You can download this file from here https://cdn.dataforseo.com/v3/examples/php/php_RestClient.zip
require('RestClient.php');
$api_url = 'https://api.dataforseo.com/';
try {
    // Instead of 'login' and 'password' use your credentials from https://app.dataforseo.com/api-dashboard
    $client = new RestClient($api_url, null, 'login', 'password');
} catch (RestClientException $e) {
    echo "n";
    print "HTTP code: {$e->getHttpCode()}n";
    print "Error code: {$e->getCode()}n";
    print "Message: {$e->getMessage()}n";
    print  $e->getTraceAsString();
    echo "n";
    exit();
}
$post_array = array();
// You can set only one task at a time
$post_array[] = array(
    "keyword" => "water quality",
    "last_updated" => "1m",
    "file_formats" => [ "archive", "image" ],
    "usage_rights" => "noncommercial",
    "is_free" => true,
    "topics" => [ "natural_sciences", "geo" ]
);
try {
    // POST /v3/serp/google/dataset_search/live/advanced
    // in addition to 'google' and 'dataset_search' you can also set other search engine and type parameters
    // the full list of possible parameters is available in documentation
    $result = $client->post('/v3/serp/google/dataset_search/live/advanced', $post_array);
    print_r($result);
    // do something with post result
} catch (RestClientException $e) {
    echo "n";
    print "HTTP code: {$e->getHttpCode()}n";
    print "Error code: {$e->getCode()}n";
    print "Message: {$e->getMessage()}n";
    print  $e->getTraceAsString();
    echo "n";
}
$client = null;
?>

The above command returns JSON structured like this:

{
  "version": "0.1.20221214",
  "status_code": 20000,
  "status_message": "Ok.",
  "time": "2.0795 sec.",
  "cost": 0.002,
  "tasks_count": 1,
  "tasks_error": 0,
  "tasks": [
    {
      "id": "01161741-1535-0139-0000-5eb67a6e8212",
      "status_code": 20000,
      "status_message": "Ok.",
      "time": "2.0246 sec.",
      "cost": 0.002,
      "result_count": 1,
      "path": [
        "v3",
        "serp",
        "google",
        "dataset_search",
        "live",
        "advanced"
      ],
      "data": {
        "api": "serp",
        "function": "live",
        "se": "google",
        "se_type": "dataset_search",
        "keyword": "water quality",
        "last_updated": "1m",
        "file_formats": [
          "archive",
          "image"
        ],
        "usage_rights": "noncommercial",
        "is_free": true,
        "topics": [
          "natural_sciences",
          "geo"
        ],
        "device": "desktop",
        "os": "windows"
      },
      "result": [
        {
          "keyword": "water quality",
          "se_domain": "datasetsearch.research.google.com",
          "language_code": "en",
          "check_url": "https://datasetsearch.research.google.com/search?query=water%20quality&hl=en&filters=WyJbXCJ1cGRhdGVkX2RhdGVcIixbXCIxbVwiXV0iLCJbXCJmaWxlX2Zvcm1hdF9jbGFzc1wiLFtcIjdcIixcIjVcIl1dIiwiW1wibGljZW5zZV9jbGFzc1wiLFtcIm5vbmNvbW1lcmNpYWxcIl1dIiwiW1wiaXNfYWNjZXNzaWJsZV9mb3JfZnJlZVwiLFtdXSIsIltcImZpZWxkX29mX3N0dWR5XCIsW1wibmF0dXJhbF9zY2llbmNlc1wiLFwiZ2VvXCJdXSJd",
          "datetime": "2023-01-16 15:41:03 +00:00",
          "spell": null,
          "item_types": [
            "dataset"
          ],
          "se_results_count": 11,
          "items_count": 11,
          "items": [
            {
              "type": "dataset",
              "rank_group": 1,
              "rank_absolute": 1,
              "position": "left",
              "xpath": null,
              "dataset_id": "L2cvMTFwYzA4cmhqeg==",
              "title": "Logan River Observatory: South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West Aquatic Site (SLB_600W_CNL) Quality Controlled Data",
              "image_url": null,
              "scholarly_citations_count": null,
              "links": [
                {
                  "type": "link_element",
                  "title": "hydroshare.org",
                  "description": null,
                  "url": "http://www.hydroshare.org/",
                  "domain": "www.hydroshare.org"
                },
                {
                  "type": "link_element",
                  "title": "dataone.org",
                  "description": null,
                  "url": "http://search.dataone.org/",
                  "domain": "search.dataone.org"
                }
              ],
              "dataset_providers": [
                {
                  "type": "dataset_providers_element",
                  "title": "HydroShare",
                  "url": null,
                  "domain": null
                }
              ],
              "formats": [
                {
                  "type": "formats_element",
                  "format": "zip",
                  "size": null
                }
              ],
              "authors": [
                {
                  "type": "authors_element",
                  "name": "Logan River Observatory",
                  "url": null,
                  "domain": null
                }
              ],
              "licenses": [
                {
                  "type": "licenses_element",
                  "title": "Attribution 4.0 (CC BY 4.0)",
                  "url": "https://creativecommons.org/licenses/by/4.0/",
                  "domain": "creativecommons.org"
                }
              ],
              "updated_date": "2022-12-27 02:00:00 +00:00",
              "area_covered": [
                "2300 North 600 West",
                "South Logan Benson Canal at Benson Irrigation Company Flume",
                "Logan",
                "North America",
                "Rocky Mountains"
              ],
              "period_covered": null,
              "dataset_description": {
                "text": "This dataset contains quality control level 1 (QC1) data for all of the variables measured for the aquatic site on the South Logan Benson Canal at Benson Irrigation Company Flume, 2300 North 600 West (SLB_600W_CNL). Each file contains all available QC1 data for a specific variable. Files will be updated as new data become available, but no more than once daily. These data have passed QA/QC procedures such as sensor calibration and visual inspection and removal of obvious errors. These data are approved by Technicians as the best available version of the data. See published script for correction steps specific to this data series. Each file header contains detailed metadata for site information, variable and method information, source information, and qualifiers referenced in the data. This site is currently operated as part of the Logan River Observatory.\n",
                "links": null
              }
            },
            {
              "type": "dataset",
              "rank_group": 2,
              "rank_absolute": 2,
              "position": "left",
              "xpath": null,
              "dataset_id": "L2cvMTFuMDQ3X3B6aA==",
              "title": "Lake Simcoe Monitoring",
              "image_url": null,
              "scholarly_citations_count": 31,
              "links": [
                {
                  "type": "link_element",
                  "title": "canada.ca",
                  "description": null,
                  "url": "http://open.canada.ca/",
                  "domain": "open.canada.ca"
                },
                {
                  "type": "link_element",
                  "title": "arctic-sdi.org",
                  "description": null,
                  "url": "http://catalogue.arctic-sdi.org/",
                  "domain": "catalogue.arctic-sdi.org"
                }
              ],
              "dataset_providers": [
                {
                  "type": "dataset_providers_element",
                  "title": "Government of Ontario",
                  "url": null,
                  "domain": null
                }
              ],
              "formats": [
                {
                  "type": "formats_element",
                  "format": "pdf",
                  "size": null
                },
                {
                  "type": "formats_element",
                  "format": "html",
                  "size": null
                },
                {
                  "type": "formats_element",
                  "format": "zip",
                  "size": null
                }
              ],
              "authors": null,
              "licenses": [
                {
                  "type": "licenses_element",
                  "title": "Open Government Licence - Canada 2.0",
                  "url": "https://open.canada.ca/en/open-government-licence-canada",
                  "domain": "open.canada.ca"
                }
              ],
              "updated_date": "2022-12-30 02:00:00 +00:00",
              "area_covered": null,
              "period_covered": {
                "start_date": "1980-01-01 03:00:00 +00:00",
                "end_date": "2021-12-31 02:00:00 +00:00",
                "displayed_date": "Jan 1, 1980 - Dec 31, 2021"
              },
              "dataset_description": {
                "text": "The Lake Simcoe lake monitoring program provides measurements of chemical and physical water quality limits such as total phosphorus, nitrogen, chlorophyll a, pH, alkalinity, conductivity, dissolved organic and inorganic carbon, silica, other ions, water transparency, temperature and dissolved oxygen. Samples are collected biweekly during the spring, summer and fall. *[pH]: potential of hydrogen\n",
                "links": null
              }
            }
          ]
        }
      ]
    }
  ]
}

All POST data should be sent in the JSON format (UTF-8 encoding). When setting a task, you should send all task parameters in the task array of the generic POST array. You can send up to 2000 API calls per minute, each Live SERP API call can contain only one task.

Below you will find a detailed description of the fields you can use for setting a task.

Description of the fields for setting a task:

Field name Type Description
keyword string keyword
required field
you can specify up to 700 symbols in the keyword field
all %## will be decoded (plus symbol ‘+’ will be decoded to a space character)
if you need to use the “%” symbol for your keyword, please specify it as “%25”;
if you need to use the “+” symbol for your keyword, please specify it as “%2B”;
language_name string full name of search engine language
optional field
if you use this field, you don’t need to specify language_code
only value:
English
language_code string search engine language code
optional field if you don’t specify language_name
if you use this field, you don’t need to specify language_name
only value:
en
device string device type
optional field
only value: desktop
os string device operating system
optional field
choose from the following values: windows, macos
default value: windows
depth integer parsing depth
optional field
number of results in SERP
default value: 20
max value: 700
Note: your account will be billed per each SERP containing up to 20 results;
thus, setting a depth above 20 may result in additional charges if the search engine returns more than 20 results;
if the specified depth is higher than the number of results in the response, the difference will be refunded automatically to your account balance
last_updated string last time the dataset was updated
optional field
possible values: 1m, 1y, 3y
file_formats array file formats of the dataset
optional field
possible values: other, archive, text, image, document, tabular
usage_rights string usage rights of the dataset
optional field
possible values: commercial, noncommercial
is_free boolean indicates whether displayed datasets are free
optional field
possible values: true, false
topics array dataset topics
optional field
possible values: humanities, social_sciences, life_sciences, agriculture, natural_sciences, geo, computer, architecture_and_urban_planning, engineering
tag string user-defined task identifier
optional field
the character limit is 255
you can use this parameter to identify the task and match it with the result
you will find the specified tag value in the data object of the response

‌‌‌
As a response of the API server, you will receive JSON-encoded data containing a tasks array with the information specific to the set tasks.

Description of the fields in the results array:

Field name Type Description
version string the current version of the API
status_code integer general status code
you can find the full list of the response codes here
Note: we strongly recommend designing a necessary system for handling related exceptional or error conditions
status_message string general informational message
you can find the full list of general informational messages here
time string execution time, seconds
cost float total tasks cost, USD
tasks_count integer the number of tasks in the tasks array
tasks_error integer the number of tasks in the tasks array returned with an error
tasks array array of tasks
        id string task identifier
unique task identifier in our system in the UUID format
        status_code integer status code of the task
generated by DataForSEO; can be within the following range: 10000-60000
you can find the full list of the response codes here
        status_message string informational message of the task
you can find the full list of general informational messages here
        time string execution time, seconds
        cost float cost of the task, USD
        result_count integer number of elements in the result array
        path array URL path
        data object contains the same parameters that you specified in the POST request
        result array array of results
            keyword string keyword received in a POST array
the keyword is returned with decoded %## (plus symbol ‘+’ will be decoded to a space character)
            se_domain string search engine domain in a POST array
            language_code string language code in a POST array
            check_url string direct URL to search engine results
you can use it to make sure that we provided accurate results
            datetime string date and time when the result was received
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2019-11-15 12:57:46 +00:00
            spell object autocorrection of the search engine
if the search engine provided results for a keyword that was corrected, we will specify the keyword corrected by the search engine and the type of autocorrection
            item_types array types of search results in SERP
contains types of search results (items) found in SERP.
possible item type: dataset
            se_results_count integer total number of results in SERP
            items_count integer the number of results returned in the items array
            items array elements of search results found in SERP
                type string type of element = ‘dataset’
                rank_group integer group rank in SERP
position within a group of elements with identical type values
positions of elements with different type values are omitted from rank_group
                rank_absolute integer absolute rank in SERP
absolute position among all the elements in SERP
                position string the alignment of the element in SERP
can take the following values:
left, right
                xpath string the XPath of the element
                dataset_id string ID of the dataset
                title string title of the result in SERP
                image_url string URL of the image
the URL leading to the image on the original resource or DataForSEO storage (in case the original source is not available)
                scholarly_citations_count integer count of articles that refer to the dataset
                links array sitelinks
the links shown below some of Google Dataset’s search results
if there are none, equals null
                    type string type of element = ‘link_element
                    title string title of the result in SERP
                    description string description of the results element in SERP
                    url string sitelink URL
                    domain string domain in SERP
                dataset_providers array the list of institutions that provided the dataset
                    type string type of element = ‘dataset_providers_element
                    title string name of the dataset provider
                    url string site URL of the dataset provider
                    domain string site domain of the dataset provider
                formats array the list of file formats of the dataset
                    type string type of element = ‘formats_element
                    format string type of file format of the dataset
for example: zip, html, csv
                    size string file size in bytes
                authors array the list of authors of the dataset
                    type string type of element = ‘authors_element
                    name string name of the dataset author
                    url string author’s link URL
                    domain string author’s link domain
                licenses array the list of licenses issued to the dataset
                    type string type of element = ‘licenses_element
                    title string name of the license
                    url string license URL
                    domain string license page domain
                updated_date string date and time when the result was last updated
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2022-11-27 02:00:00 +00:00
                area_covered array the list of areas covered in the dataset
for example: Africa, Global
                period_covered object period covered in the dataset
                    start_date string date and time when the period starts
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2020-03-02 02:00:00 +00:00
                    end_date string date and time when the period ends
in the UTC format: “yyyy-mm-dd hh-mm-ss +00:00”
example:
2022-12-09 02:00:00 +00:00
                    displayed_date string period displayed in SERP
example:
Mar 2, 2020 - Dec 9, 2022
                dataset_description object description of the dataset
                    text string text of the description
                    links array links featured in the ‘dataset_description’
                       type string type of element = ‘link_element
                       title string link anchor text
                       description string description of the results element in SERP
                       url string URL link
                       domain string domain in SERP

‌‌